package com.kdtech.analyse.video;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import com.kdtech.crawler.CrawlHTML;
import com.kdtech.entity.crawler.UrlMeta;
import com.kdtech.entity.data.NewsMeta;
import com.kdtech.utils.DateUtils;
import com.kdtech.utils.StringUtils;
import com.kdtech.analyse.AnalyseNews;
/**
 * http://v.ifeng.com/ 凤凰网
 * @author allen 
 */
public class IfengAnalyse implements AnalyseNews {

	
	public boolean isDetailPage(String url) {
		boolean bRet = false;
		String[] regex = {
				"http://v.ifeng.com/.*/[0-9]+/[0-9a-z]+[-][0-9a-z]+[-][0-9a-z]+[-][0-9a-z]+[-][0-9a-z]+.shtml.*",
				};
		for (int i = 0; i < regex.length; i++) {
			if (url.matches(regex[i])) {
				return true;
			}
		}
		return bRet;
	}

	
	public NewsMeta parserHtml(UrlMeta urlMeta) {		
		NewsMeta video = new NewsMeta();
		if (urlMeta.getHtml() == null) {
		}		
		String htmltxt = urlMeta.getHtml();
		String url = urlMeta.getUrl();		
		if(!isDetailPage(url)){
		}				
		video.setUrl(url);		
		String title = null;
		String desc = null;
		Long date = null;
		Document doc = Jsoup.parse(htmltxt);

			title =doc.select("div.modtit > div.titleft > h1").text();
			if(StringUtils.isBlank(title)){	
				title =doc.select("h1 span").html();	
				if(StringUtils.isBlank(title)){	
					title =doc.select("h1").html();				
				}	
			}	

			date = DateUtils.matchDate(doc.select("div.modtit > div.titleft > p").text());
			if(date==null){
				date = DateUtils.matchDate(doc.select("div.playerinfo p").text());
				if(date==null){
					date = DateUtils.matchDate(doc.select("h1#js_video_title").text());
				}
			}
			if(date==null){
				date = DateUtils.matchDate(url);
			}
			
			desc = doc.select("p.inforp").text();
			
			video.setTitle(StringUtils.trimSpace(title));
			video.setContent(desc);
			video.setDate(date);

		
		
		
			
			String author=null;
			author=doc.select("html body div#js_video_continner div.playerinfo p").text();
			if(author!=null && author.indexOf("来源:")!=-1){
				author=StringUtils.substringBetween(author, "来源:","发布:").trim();				
			}			
			if(StringUtils.isBlank(author)){
				author=doc.select("div#js_info_continner div.infobox ul.columnstag li.typ02").text();	
				author=author.replace("来   源：", "");
			}
			video.setAuthor(author);
		    return video;
	}
	
	public NewsMeta Update(NewsMeta meta) {
		return null;
	}
	public static void main(String[] args) throws Exception {
		IfengAnalyse a = new IfengAnalyse();
		String url = "http://gongyi.ifeng.com/a/20151106/41502320_0.shtml";
		System.out.println(a.isDetailPage(url));
		UrlMeta meta = CrawlHTML.responseToURL(url);
		NewsMeta parserHtml = a.parserHtml(meta);
		System.out.println(parserHtml);
	}

	
	
}
